#=============================================================================#
#
# PROJECT:        Who Pays for Peace?
# AUTHORS:        ** anonymized for review **
# CONTACT:        ** anonymized for review **
# LAST MODIFIED:  July 7, 2022
# 
#=============================================================================#
#
# This R file contains the code used to replicate the within-subjects analyses
# (difference in treatment effects T1 vs. T2)
# 
#=============================================================================#


# Initial settings ------------------------------------------------------------
# Set the working directory by clicking the ".Rproj" file
# Restart an R session before running this script

#rm(list=ls())
#getwd()

## Install and load all necessary packages ------------------------------------
# ipak function: install and load multiple R packages.
# check to see if packages are installed. Install them if they are not, then load them into the R session.

ipak <- function(pkg){  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}

packages <- c("dplyr", "tidyverse", "ggplot2", "ggpubr", "lmtest", "sandwich") 
ipak(packages)


## Load the data --------------------------------------------------------------
mydata.t1 <- readRDS("data/T1-clean-data.rds")
mydata.t2 <- readRDS("data/T2-clean-data.rds")


## Inspect the data -----------------------------------------------------------
# attrition rates in manuscript
sum(mydata.t1$email == 1) 
sum(mydata.t1$email == 1) / nrow(mydata.t1)*100
nrow(mydata.t2) 
nrow(mydata.t2) / sum(mydata.t1$email == 1)*100


## Make panel data ------------------------------------------------------------
# add Wave 2 outcomes to Wave 1 dataset 
mydata.t1$compensation.t2 <- mydata.t2$compensation[match(mydata.t1$ID, mydata.t2$ID)]
mydata.t1$punishment.t2 <- mydata.t2$punishment[match(mydata.t1$ID, mydata.t2$ID)]

# wide panel data
mypanel <- mydata.t1 %>%
  dplyr::select(ID, 
         comp_condition, compensation, compensation.t2, 
         punish_condition, punishment, punishment.t2) %>%
  rename(compensation.t1 = compensation,
         punishment.t1 = punishment) %>%
  mutate(comp_diff = compensation.t2 - compensation.t1,
         punish_diff = punishment.t2 - punishment.t1) %>%
  drop_na(compensation.t2)
summary(mypanel)

# long panel data
mylongpanel <- reshape(as.data.frame(mypanel), sep = ".t", times = c(1, 2),
                       direction = "long",
                       varying = c("compensation.t1", "compensation.t2", 
                                   "punishment.t1", "punishment.t2"))
mylongpanel$time <- as.factor(mylongpanel$time)

# Do the means change over time? ----------------------------------------------

# plot preparations
pd <- position_dodge(.5) 
cols <- c("#440154", "#3b528b", "#21918c", "#5ec962", "#fdd525") 

## Compensations --------------------------------------------------------------
mypanel$comp_condition <- relevel(mypanel$comp_condition, ref = "Control")
lm_c_diff <- lm(comp_diff ~ 0 + comp_condition, data = mypanel)
lm_c_diff <- coeftest(lm_c_diff, vcov = vcovCL, cluster = ~ID)
lm_c_diff # differences over time per condition
t.test(mypanel$comp_diff) # is the overall mean difference equal to 0?

# means, sd, and CI per condition, compensations at time 1
sum_c.t1 <- mypanel %>% 
  group_by(comp_condition) %>%
  rename(condition = comp_condition)  %>%
  summarise(mean = mean(compensation.t1, na.rm = TRUE),
            sd = sd(compensation.t1, na.rm = TRUE),
            n = n()) %>%
  mutate(se = sd / sqrt(n),
         lower.ci = mean - qt(1 - (0.05 / 2), n - 1) * se,
         upper.ci = mean + qt(1 - (0.05 / 2), n - 1) * se,
         time = "Wave 1")

# means, sd, and CI per condition, compensations at time 2
sum_c.t2 <- mypanel %>% 
  group_by(comp_condition) %>%
  rename(condition = comp_condition)  %>%
  summarise(mean = mean(compensation.t2, na.rm = TRUE),
            sd = sd(compensation.t2, na.rm = TRUE),
            n = n()) %>%
  mutate(se = sd / sqrt(n),
         lower.ci = mean - qt(1 - (0.05 / 2), n - 1) * se,
         upper.ci = mean + qt(1 - (0.05 / 2), n - 1) * se,
         time = "Wave 2")

# means, sd, and CI per condition, both waves
sum_c.diff <- rbind(sum_c.t1, sum_c.t2)
sum_c.diff

# nicer labels
sum_c.diff$label[sum_c.diff$condition=="Armenia"] <- "Armenia\n(outgroup)"
sum_c.diff$label[sum_c.diff$condition=="Azerbaijan"] <- "Azerbaijan\n(ingroup)"
sum_c.diff$label[sum_c.diff$condition=="Both"] <- "Armenia +\nAzerbaijan"
sum_c.diff$label[sum_c.diff$condition=="Int. Comm."] <- "International\nCommunity"
sum_c.diff$label[sum_c.diff$condition=="Control"] <- "Control\nGroup"
sum_c.diff$label <- factor(sum_c.diff$label, levels = c("Armenia\n(outgroup)", "Azerbaijan\n(ingroup)", "Armenia +\nAzerbaijan", "International\nCommunity", "Control\nGroup"))
sum_c.diff

# plot for compensations
fig2A <- ggplot(sum_c.diff, aes(x = label, y = mean, colour = label, shape = time)) + 
  geom_errorbar(aes(ymin = lower.ci, ymax = upper.ci), width = .1, position = pd, size = 0.7) +
  geom_point(size = 3, position = pd) + 
  scale_y_continuous(limits = c(1, 5), breaks = round(seq(1, 5, by = 0.5),1)) +
  scale_color_manual(values = cols, guide = "none") +
  labs(title = "A. Monetary Compensations for Victims",
       x="", y="Mean Level of Support\n", 
       shape = "Time") +
  theme_bw() +
  theme(plot.title = element_text(face="bold"),
        axis.ticks.length = unit(.25, "cm"),
        axis.text.x = element_text(colour = cols, size = 12),
        axis.text.y = element_text(colour = "gray20", size = 12),
        axis.title = element_text(size = 13),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 13))
fig2A


## Punishments ----------------------------------------------------------------
mypanel$punish_condition <- relevel(mypanel$punish_condition, ref = "Control")
lm_p_diff <- lm(punish_diff ~ 0 + punish_condition, data = mypanel)
lm_p_diff <- coeftest(lm_p_diff, vcov = vcovCL, cluster = ~ID)
lm_p_diff # differences over time per condition
t.test(mypanel$punish_diff) # is the overall mean difference equal to 0?
t.test(mypanel[mypanel$punish_condition=="Control",]$punish_diff)

# means, sd, and CI per condition, compensations at time 1
sum_p.t1 <- mypanel %>% 
  group_by(punish_condition) %>%
  rename(condition = punish_condition)  %>%
  summarise(mean = mean(punishment.t1, na.rm = TRUE),
            sd = sd(punishment.t1, na.rm = TRUE),
            n = n()) %>%
  mutate(se = sd / sqrt(n),
         lower.ci = mean - qt(1 - (0.05 / 2), n - 1) * se,
         upper.ci = mean + qt(1 - (0.05 / 2), n - 1) * se,
         time = "Wave 1")

# means, sd, and CI per condition, compensations at time 2
sum_p.t2 <- mypanel %>% 
  group_by(punish_condition) %>%
  rename(condition = punish_condition)  %>%
  summarise(mean = mean(punishment.t2, na.rm = TRUE),
            sd = sd(punishment.t2, na.rm = TRUE),
            n = n()) %>%
  mutate(se = sd / sqrt(n),
         lower.ci = mean - qt(1 - (0.05 / 2), n - 1) * se,
         upper.ci = mean + qt(1 - (0.05 / 2), n - 1) * se,
         time = "Wave 2")

# means, sd, and CI per condition, both waves
sum_p.diff <- rbind(sum_p.t1, sum_p.t2)
sum_p.diff

# nicer labels
sum_p.diff$label[sum_p.diff$condition=="Armenia"] <- "Armenia\n(outgroup)"
sum_p.diff$label[sum_p.diff$condition=="Azerbaijan"] <- "Azerbaijan\n(ingroup)"
sum_p.diff$label[sum_p.diff$condition=="Both"] <- "Armenia +\nAzerbaijan"
sum_p.diff$label[sum_p.diff$condition=="Int. Comm."] <- "International\nCommunity"
sum_p.diff$label[sum_p.diff$condition=="Control"] <- "Control\nGroup"
sum_p.diff$label <- factor(sum_p.diff$label, levels = c("Armenia\n(outgroup)", "Azerbaijan\n(ingroup)", "Armenia +\nAzerbaijan", "International\nCommunity", "Control\nGroup"))
sum_p.diff

# plot for compensations
cols <- c("#440154", "#3b528b", "#21918c", "#fdd525") 
fig2B <- ggplot(sum_p.diff, 
                aes(x = label, y = mean, colour = factor(label), shape = time)) +
  geom_errorbar(aes(ymin = lower.ci, ymax = upper.ci), 
                width = .1, position = pd, size = 0.7) +
  geom_point(size = 3, position = pd) + 
  scale_y_continuous(limits = c(1, 5), breaks = round(seq(1, 5, by = 0.5),1)) +
  scale_color_manual(values = cols, guide = "none") +
  labs(title = "B. War Crime Punishments",
       x="\nExperimental Condition", y="Mean Level of Support\n", 
       shape = "Time") +
  theme_bw() +
  theme(plot.title = element_text(face="bold"),
        axis.ticks.length = unit(.25, "cm"),
        axis.text.x = element_text(colour = cols, size = 12),
        axis.text.y = element_text(colour = "gray20", size = 12),
        axis.title = element_text(size = 13),
        legend.text = element_text(size = 12),
        legend.title = element_text(size = 13))
fig2B 


### Figure 2 ----------
#open plot environment
jpeg("figures/fig2.jpg", width = 8, height = 8, units='in', res = 300)
#arrange plots
ggarrange(fig2A, fig2B, 
          common.legend = TRUE,
          legend = "right",
          ncol = 1, nrow = 2)
#save plot
dev.off()


# Do the ATEs change over time? -----------------------------------------------

## Compensations --------------------------------------------------------------
mylongpanel$comp_condition <- relevel(mylongpanel$comp_condition, ref = "Control")
lm_c_atediff <- lm(compensation ~ comp_condition*time,
                   data = mylongpanel)
lm_c_atediff <- coeftest(lm_c_atediff, vcov = vcovCL, cluster = ~id) #cluster SEs by individual
lm_c_atediff

summary(lm(compensation ~ comp_condition, 
           data = mylongpanel[mylongpanel$time==1,]))
summary(lm(compensation ~ comp_condition, 
           data = mylongpanel[mylongpanel$time==2,]))


## Punishments ----------------------------------------------------------------
mylongpanel$punish_condition <- relevel(mylongpanel$punish_condition, ref = "Control")
lm_p_atediff <- lm(punishment ~ punish_condition*time,
                   data = mylongpanel)
lm_p_atediff <- coeftest(lm_p_atediff, vcov = vcovCL, cluster = ~id) #cluster SEs by individual
lm_p_atediff

summary(lm(punishment ~ punish_condition, 
           data = mylongpanel[mylongpanel$time==1,]))
summary(lm(punishment ~ punish_condition, 
           data = mylongpanel[mylongpanel$time==2,]))
